from jieba import analyse
import pandas as pd

# 引入TextRank关键词抽取接口
textrank = analyse.textrank

# 读取数据
df_train = pd.read_csv('../data/train_processed.csv')
# df_train = pd.read_csv('../data/test.csv')
df_test = pd.read_csv('../data/test_processed.csv')


# 训练集和测试集的description一起做关键词提取
all_description = pd.concat([df_train['description'],df_test['description']],  axis=0)
df_class = df_train[['lable', 'description']].groupby(['lable']).aggregate(lambda x: ' '.join(x))

for class_line in df_class['description']:
    # print(class_line)
    keywords = textrank(class_line, topK=50)
    print(keywords)
